/* Author:  Amanda McFarland (amcfarland@frbchi.org)
 * Created: 4/8/2020
 * Purpose: Creates state-level event study of consumer spending responses to stay-at-home orders.
 * Differences: makes second measure per internal N
 * Edited: Diane, 4/27/2020
 */


 
/* --- File setup --- */
clear all 
cap log close
set more off
set matsize 11000


*Set sub-directory paths--same for all users:

global unacast  "${machine}/data/clean/unacast"
global stay_at_home "${machine}/data/clean/stay_at_home"
global womply "${machine}/data/clean/womply"
global second_measure "${machine}/data/clean/second_measure"
global nyt_cases "${machine}/data/clean/nyt_cases"
global census "${machine}/data/clean/census"
global trump "${machine}/data/clean/trump"
global acs "${machine}/data/clean/acs_2018"
global usda "${machine}/data/clean/usda_ers"
global tables "${machine}/tables/event_time/secondmeasure"
global figures "${machine}/figures/event_time/secondmeasure"

global outdata "${machine}/data/clean/"

* switches
local p1 = 0 			// run sector-level analysis
local samp all 		// which sample to run on
	// all :	run on all states
	// small :	run on states which passed stay-at-home orders all at once 

* Date
local date=subinstr(c(current_date), " ", "_", .)
set obs 1 
gen date="`date'"
split date, parse("_")
if date1=="" {
	local d = 2
	local m = 3
	local y = 4
}
else if date1!="" {
	local d = 1
	local m = 2
	local y = 3
}
replace date`m' = "january" if date`m'=="Jan"
replace date`m' = "february" if date`m'=="Feb"
replace date`m' = "march" if date`m'=="Mar"
replace date`m' = "april" if date`m'=="Apr"
replace date`m' = "may" if date`m'=="May"
replace date`m' = "june" if date`m'=="Jun"
replace date`m' = "july" if date`m'=="Jul"
replace date`m' = "august" if date`m'=="Aug"
replace date`m' = "september" if date`m'=="Sep"
replace date`m' = "october" if date`m'=="Oct"
replace date`m' = "november" if date`m'=="Nov"
replace date`m' = "december" if date`m'=="Dec"
cap replace date=date`d'+date`m'+date`y'
local date=date[1]
display as error "Today's date: `date'"


/* --- Prepare second measure data --- */
	* load data (only data from March)
clear
	pwd
	
	use "${second_measure}/sector_geo_daily_sales.dta"  if date>=date("20200220", "YMD"), clear
	
	* drop useless variables
	drop year naics2
	
	*generate dataset with no holes, merge back in data
	*not all states report all sectors.

	
	tab date
	gen date_no=date
	tab date_no
	
	tempfile state_sector_date
	preserve
	keep state sector
	duplicates drop
	sort state sector 
	expand 56
	bysort state sector: gen date=_n
	replace date=date+21964
	format date %td
	tab date
	save `state_sector_date', replace
	restore
	
	merge 1:1 state sector date using `state_sector_date',nogen
	replace sales_sector=0 if sales_sector==.
	
	* fill in missing state FIPS data
	bys state: egen statefip = min(state_fips)
	replace state_fips = statefip
	drop statefip 

	* save tempfile
	tempfile second_measure
	sa "`second_measure'", replace


/* --- Prepare USDA state-level characteristics data --- */
	* load data
	use "${usda}/county_information.dta" if county_fips<57000, clear
	
	* pull state FIPS out of county FIPS
	tostring(county_fips), gen(state_fips)
	replace state_fips = substr(state_fips, 1, strlen(state_fips)-3)
	destring(state_fips), replace
	
	* collapse population to state level
	collapse (sum) pop_estimate_2018, by(state_fips)
	

/* --- Merge data together --- */
	* stay at home orders
	merge 1:1 state_fips using "${stay_at_home}/stay_at_home_state_level_AM.dta", ///
	  keepusing(state_fips state half_wtd all_at_once) assert(3) nogen
	
	* drop DC
	drop if state_fips==11


	* second measure data
	merge 1:m state_fips using "`second_measure'", ///
	  assert(3) nogen
	  
	* sort & order data for readability
	sort state_fips sector date
	order state_fips state pop_estimate_2018 half_wtd date sales_sector
	
	
/* --- Create per capita measures --- */
* merge in average customers data 
* see do file - summary_second_measure.do to see how the average customers variable is created 
merge m:1 state_fips using "${second_measure}/avg_customers.dta" , assert(3) nogen 
ren avg_customers customers_earlymarch 
gen sales_sector_percap = sales_sector / customers_earlymarch
gen log_sales_sector_percap = log(sales_sector_percap + 1) 

*	gen sales_sector_percap = sales_sector / customers_sector
*	gen sales_sector_percap = sales_sector / pop_estimate_2018
	
	
tempfile state_deaths
preserve 
use  "${nyt_cases}/nyt_cases_04_17.dta", clear
tostring county_fips, usedisplayformat replace
gen state_fips=substr(county_fips,1,2)
destring state_fips, replace
collapse (sum) cases deaths, by(state_fips date)
save `state_deaths',replace
restore

**ARE RI and WV not in data?
merge m:1 state_fips date using `state_deaths', keep(match master)
**missings in the cases/deaths are zeroes
replace cases=0 if cases==.
replace deaths=0 if deaths==.

	*drop counties with no stay at home order 
local medvar "case death"

**creates a variable with the first case or death
foreach m of local medvar {
gen first`m'_t=.
sort state_fips
bysort state_fips: replace first`m'_t=1 if `m's[_n]!=0 & `m's[_n-1]==0
replace first`m'_t=date*first`m'_t
bysort state_fips: egen first`m'_tt=min(first`m'_t)
bysort state_fips: egen first`m'=mean(first`m'_tt)
format first`m' %td
}


*Save copy of dataset for future analyses
preserve

gen sectorstub = substr(sector,1,8)

gen newstrvar = "" 

gen length = length(sectorstub) 
su length, meanonly 

qui forval j = 1/`r(max)' { 
     replace newstrvar = newstrvar + substr(sectorstub, `j', 1) if inrange(substr(sectorstub, `j', 1), "a", "z") 
}
replace sectorstub = newstrvar

rename half_wtd effective_date
gen event_time = date - effective_date + 1

drop _merge

keep sales_sector_percap date event_time pop_estimate_2018 state_fips sectorstub effective_date

gen logsales_sector_percap = log(sales_sector_percap+1)

rename sales_sector_percap sales_sector_percap_
rename logsales_sector_percap logsales_sector_percap_

reshape wide sales_sector_percap_ logsales_sector_percap_, i(date effective_date event_time pop_estimate_2018 state_fips) j(sectorstub) string

duplicates report date state_fips

save "${outdata}/statedata_foreventtime", replace

restore



*foreach e in firstdeath half_wtd {
tempfile half_wtd
tempfile firstdeath
foreach e in half_wtd firstdeath  {
	
	preserve
	
/* --- Create event time & set window --- */
	* create event time
	gen event_time = date - `e' + 1
	*keep if event_time>=-10 & event_time<=10
	replace event_time = -11 if event_time<-11
	replace event_time = 11 if event_time>11

	*drop states that don't have at least event time =0 and event time=1
	bys state_fips: egen max_post = max(event_time)
	keep if max_post>=5 & max_post!=.

	* shift event time to avoid negatives
	replace event_time = event_time + 11
		
/* --- Make sample selection --- */
	* if samp = all, keep all states had at least 5 days post 
	* if samp = small, keep only states which:
		// 1. passed stay-at-home orders all at once AND
		// 2. had  at least 5 days post 
	if ("`samp'"=="small") {
		keep if all_at_once==1
	}		
		
		
	* get number of time periods & number of sectors
	tab event_time
	local max_time = r(r)
	tab sector
	local max_sector = r(r)
	
	
/* --- Run regression & store estimates for each sector --- */
	* set omitted category = -5
	fvset base 6 event_time
	
	* loop through sectors
	levelsof sector, local(sectors) 
	local s = 1
	tempfile master
	sa "`master'", replace
		
	foreach sector in `sectors' {
		use "`master'", clear 
		
		* run regreesion
		tab date_no
		quietly reghdfe sales_sector_percap i.event_time i. date if sector=="`sector'" [aweight=pop_estimate_2018], ///
		  absorb( state_fips) cluster(state_fips date)

		* save estimates
		matrix b=e(b)
		matrix V=e(V)
		forval i=1/`max_time' {
			local beta_`i'_`s' = b[1,`i']
			local se_`i'_`s' = sqrt(V[`i',`i'])
		}
		
		* store sector name
		local sector_name_`s' "`sector'"
		
		* get baseline 
		collapse (mean) baseline = sales_sector_percap if event_time==10 & e(sample)==1 [aweight=pop_estimate_2018]
		local baseline_`s' = baseline[1]
		
		* advance sector counter
		local s = `s'+1
	}
	
	
	
	

	
/* --- Create new dataset of estimates for graphing --- */
	* create dataset to fill
	clear
	local new_num = `max_sector'*`max_time'
	set obs `new_num'
	gen event_time = .
	gen beta = .
	gen se = .
	gen sector_name = ""
	gen sector_code = .
	gen baseline = .
	
	* replace beta and sd estimates
	local i = 1
	forval s=1/`max_sector' {
		forval t=1/`max_time' {
			display `max_time'
			display "`max_time'"
			replace event_time = `t'-12 if _n==`i'
			replace sector_name = proper("`sector_name_`s''") if _n==`i'
			replace sector_code = `s' if _n==`i'
			replace beta = `beta_`t'_`s'' if _n==`i'
			replace se = `se_`t'_`s'' if _n==`i'
			replace baseline = `baseline_`s'' if _n==`i'
			local i = `i'+1
		}
	}
	
	* correct se of omitted variables
	replace se = . if se==0
	
	* create error bands
	gen highci = beta+(1.96*se)
	gen lowci = beta-(1.96*se)

	
	

/* -- Plot the estimates --- */	
	* get titles for sectors
	local t_1 = "Accomodation & food services"
	local t_2 = "Amazon"
	local t_3 = "Arts, entertainment & recreation"
	local t_4 = "Construction"
	local t_5 = "Educational services"
	local t_6 = "Finance & insurance"
	local t_7 = "Food delivery"
	local t_8 = "General/wholesale retail"
	local t_9 = "Grocery stores"
	local t_10 = "Health care & social assistance"
	local t_11 = "Hotel booking"
	local t_12 = "Information"
	local t_13 = "Manufacturing"
	local t_14 = "Non-retail"
	local t_15 = "Other services (except public administration)"
	local t_16 = "All"
	local t_17 = "Pharmacies"
	local t_18 = "Professional, scientific & technical services"
	local t_19 = "Real estate, rental & leasing"
	local t_20 = "Restaurants"
	local t_21 = "Retail"
	local t_22 = "Retail trade"
	local t_23 = "Transportation"
	local t_24 = "Utilities"
	local t_25 = "Wholesale trade"
	
	* short titles for graph file names
	local t_1s = "accomodation"
	local t_2s = "amazon"
	local t_3s = "entertainment"
	local t_4s = "construction"
	local t_5s = "education"
	local t_6s = "finance"
	local t_7s = "food_delivery"
	local t_8s = "retail_gen"
	local t_9s = "grocery"
	local t_10s = "health_care"
	local t_11s = "hotel"
	local t_12s = "information"
	local t_13s = "manufacturing"
	local t_14s = "non_retail"
	local t_15s = "other"
	local t_16s = "all"
	local t_17s = "pharmacies"
	local t_18s = "professional"
	local t_19s = "real_estate"
	local t_20s = "restaurants"
	local t_21s = "retail"
	local t_22s = "retail_trade"
	local t_23s = "transport"
	local t_24s = "utilities"
	local t_25s = "wholesale_trade"
	
	**makes confidence interval meet at zero for omitted category
**alternative: just have it not plotted at all 
replace se =0 if event_time==-5
replace highci =0 if event_time==-5
replace lowci =0 if event_time==-5

keep if inrange(event_time,-10,10)
	
	* loop through sectors 
	levelsof sector_code, local(sectors)
	foreach s in `sectors' {
		* plot
		graph twoway ///
		  (scatter beta event_time if  sector_code==`s',  c(l)  msize(vsmall) color(gs7)) ///
		  (rarea highci lowci event_time if sector_code==`s', c(l)  msize(vsmall) color(gs7%15) 	///
		  legend(off) c(l)  xline(0,lcolor(gs8)) yline(0,lcolor(gs8)) scale(*1.5) scheme(s1color) ///
			ytitle("Spending per consumer ($)")  xtitle("Days relative to stay-at-home order") ///
		  title("`t_`s''", span linegap(0.01) size(medsmall))) 	
		
		* save
		graph export "${figures}/`t_`s's'_event_`samp'_`e'.pdf", as(pdf) replace
	
	* End of sectors loop
	}
	
	rename beta beta_`e'
	rename highci highci_`e'
	rename lowci lowci_`e'

	save ``e''
	
	export  delimited  using "${tables}/secondmeasure_estimates_`samp'_`e'.csv", replace
	
	restore
	}
	
	
	
	preserve 
	use `firstdeath', clear
	merge 1:1 event_time sector_code using `half_wtd', nogen

	keep if inrange(event_time,-10,10)

	* loop through sectors 
	levelsof sector_code, local(sectors)
	foreach s in `sectors' {
		* plot
				
		graph twoway ///
		 (scatter beta_half_wtd event_time  if  sector_code==`s', scale(*1.4) c(l)  msize(vsmall) color(gs7) lpattern(shortdash) ) ///
		 (rarea highci_half_wtd lowci_half_wtd event_time  if  sector_code==`s',  color(gs7%15) xline(0,lcolor(gs8)) yline(0,lcolor(gs8))  ytitle("Spending per consumer ($)") ysc(titleg(0.2cm)) xtitle("Days relative to event") scheme(s1color) ti("", span linegap(0.01) size(medsmall))) ///
		  (scatter beta_firstdeath event_time  if  sector_code==`s' , c(l)  msize(vsmall) color(gs11)) ///
		 (rarea  highci_firstdeath lowci_firstdeath event_time  if  sector_code==`s', c(l) clpat(tight_dot) lw(medthick) ///
		 msize(vsmall) color(gs11%15)   legend(order(1 "Event: stay-at-home" 3 "Event: 1st death") row(1) region(lcolor(none)) ) ///
		    title("`t_`s''", span linegap(0.01) size(medsmall))) 	

		 graph export "${figures}/`t_`s's'_byevent_`samp'.pdf", replace
	
	* End of sectors loop
	}
	restore
	
* added by Aastha - 3/31/2021 
* repeat the above loop but for log_sales_sector_percap 	
*foreach e in firstdeath half_wtd {
tempfile half_wtd
tempfile firstdeath
foreach e in half_wtd firstdeath  {
	
	preserve
	
/* --- Create event time & set window --- */
	* create event time
	gen event_time = date - `e' + 1
	*keep if event_time>=-10 & event_time<=10
	replace event_time = -11 if event_time<-11
	replace event_time = 11 if event_time>11

	*drop states that don't have at least event time =0 and event time=1
	bys state_fips: egen max_post = max(event_time)
	keep if max_post>=5 & max_post!=.

	* shift event time to avoid negatives
	replace event_time = event_time + 11
		
/* --- Make sample selection --- */
	* if samp = all, keep all states had at least 5 days post 
	* if samp = small, keep only states which:
		// 1. passed stay-at-home orders all at once AND
		// 2. had  at least 5 days post 
	if ("`samp'"=="small") {
		keep if all_at_once==1
	}		
		
		
	* get number of time periods & number of sectors
	tab event_time
	local max_time = r(r)
	tab sector
	local max_sector = r(r)
	
	
/* --- Run regression & store estimates for each sector --- */
	* set omitted category = -5
	fvset base 6 event_time
	
	* loop through sectors
	levelsof sector, local(sectors) 
	local s = 1
	tempfile master
	sa "`master'", replace
		
	foreach sector in `sectors' {
		use "`master'", clear 
		
		* run regreesion
		tab date_no
		quietly reghdfe log_sales_sector_percap i.event_time i. date if sector=="`sector'" [aweight=pop_estimate_2018], ///
		  absorb( state_fips) cluster(state_fips date)

		* save estimates
		matrix b=e(b)
		matrix V=e(V)
		forval i=1/`max_time' {
			local beta_`i'_`s' = b[1,`i']
			local se_`i'_`s' = sqrt(V[`i',`i'])
		}
		
		* store sector name
		local sector_name_`s' "`sector'"
		
		* get baseline 
		collapse (mean) baseline = sales_sector_percap if event_time==10 & e(sample)==1 [aweight=pop_estimate_2018]
		local baseline_`s' = baseline[1]
		
		* advance sector counter
		local s = `s'+1
	}
	
	
	
	

	
/* --- Create new dataset of estimates for graphing --- */
	* create dataset to fill
	clear
	local new_num = `max_sector'*`max_time'
	set obs `new_num'
	gen event_time = .
	gen beta = .
	gen se = .
	gen sector_name = ""
	gen sector_code = .
	gen baseline = .
	
	* replace beta and sd estimates
	local i = 1
	forval s=1/`max_sector' {
		forval t=1/`max_time' {
			display `max_time'
			display "`max_time'"
			replace event_time = `t'-12 if _n==`i'
			replace sector_name = proper("`sector_name_`s''") if _n==`i'
			replace sector_code = `s' if _n==`i'
			replace beta = `beta_`t'_`s'' if _n==`i'
			replace se = `se_`t'_`s'' if _n==`i'
			replace baseline = `baseline_`s'' if _n==`i'
			local i = `i'+1
		}
	}
	
	* correct se of omitted variables
	replace se = . if se==0
	
	* create error bands
	gen highci = beta+(1.96*se)
	gen lowci = beta-(1.96*se)

	
	

/* -- Plot the estimates --- */	
	* get titles for sectors
	local t_1 = "Accomodation & food services"
	local t_2 = "Amazon"
	local t_3 = "Arts, entertainment & recreation"
	local t_4 = "Construction"
	local t_5 = "Educational services"
	local t_6 = "Finance & insurance"
	local t_7 = "Food delivery"
	local t_8 = "General/wholesale retail"
	local t_9 = "Grocery stores"
	local t_10 = "Health care & social assistance"
	local t_11 = "Hotel booking"
	local t_12 = "Information"
	local t_13 = "Manufacturing"
	local t_14 = "Non-retail"
	local t_15 = "Other services (except public administration)"
	local t_16 = "All"
	local t_17 = "Pharmacies"
	local t_18 = "Professional, scientific & technical services"
	local t_19 = "Real estate, rental & leasing"
	local t_20 = "Restaurants"
	local t_21 = "Retail"
	local t_22 = "Retail trade"
	local t_23 = "Transportation"
	local t_24 = "Utilities"
	local t_25 = "Wholesale trade"
	
	* short titles for graph file names
	local t_1s = "accomodation"
	local t_2s = "amazon"
	local t_3s = "entertainment"
	local t_4s = "construction"
	local t_5s = "education"
	local t_6s = "finance"
	local t_7s = "food_delivery"
	local t_8s = "retail_gen"
	local t_9s = "grocery"
	local t_10s = "health_care"
	local t_11s = "hotel"
	local t_12s = "information"
	local t_13s = "manufacturing"
	local t_14s = "non_retail"
	local t_15s = "other"
	local t_16s = "all"
	local t_17s = "pharmacies"
	local t_18s = "professional"
	local t_19s = "real_estate"
	local t_20s = "restaurants"
	local t_21s = "retail"
	local t_22s = "retail_trade"
	local t_23s = "transport"
	local t_24s = "utilities"
	local t_25s = "wholesale_trade"
	
	**makes confidence interval meet at zero for omitted category
**alternative: just have it not plotted at all 
replace se =0 if event_time==-5
replace highci =0 if event_time==-5
replace lowci =0 if event_time==-5

keep if inrange(event_time,-10,10)
	
	* loop through sectors 
	levelsof sector_code, local(sectors)
	foreach s in `sectors' {
		* plot
		graph twoway ///
		  (scatter beta event_time if  sector_code==`s',  c(l)  msize(vsmall) color(gs7)) ///
		  (rarea highci lowci event_time if sector_code==`s', c(l)  msize(vsmall) color(gs7%15) 	///
		  legend(off) c(l)  xline(0,lcolor(gs8)) yline(0,lcolor(gs8)) scale(*1.5) scheme(s1color) ///
			ytitle("Log Spending per consumer ($)")  xtitle("Days relative to stay-at-home order") ///
		  title("`t_`s''", span linegap(0.01) size(medsmall))) 	
		
		* save
		graph export "${figures}/log_`t_`s's'_event_`samp'_`e'.pdf", as(pdf) replace
	
	* End of sectors loop
	}
	
	rename beta beta_`e'
	rename highci highci_`e'
	rename lowci lowci_`e'

	save ``e''
	
	export  delimited  using "${tables}/log_secondmeasure_estimates_`samp'_`e'.csv", replace
	
	restore
	}
	
	
	
	
	clear
	use `firstdeath', clear
	merge 1:1 event_time sector_code using `half_wtd', nogen

	keep if inrange(event_time,-10,10)

	* loop through sectors 
	levelsof sector_code, local(sectors)
	foreach s in `sectors' {
		* plot
				
		graph twoway ///
		 (scatter beta_half_wtd event_time  if  sector_code==`s', scale(*1.4) c(l)  msize(vsmall) color(gs7)  lpattern(shortdash) ) ///
		 (rarea highci_half_wtd lowci_half_wtd event_time  if  sector_code==`s',  color(gs7%15) xline(0,lcolor(gs8)) yline(0,lcolor(gs8))  ytitle("Log Spending per consumer ($)") ysc(titleg(0.2cm)) xtitle("Days relative to event") scheme(s1color) ti("", span linegap(0.01) size(medsmall))) ///
		  (scatter beta_firstdeath event_time  if  sector_code==`s' , c(l)  msize(vsmall) color(gs11)) ///
		 (rarea  highci_firstdeath lowci_firstdeath event_time  if  sector_code==`s', c(l) clpat(tight_dot) lw(medthick) ///
		 msize(vsmall) color(gs11%15)   legend(order(1 "Event: stay-at-home" 3 "Event: 1st death") row(1) region(lcolor(none)) ) ///
		    title("`t_`s''", span linegap(0.01) size(medsmall))) 	

		 graph export "${figures}/log_`t_`s's'_byevent_`samp'.pdf", replace
	
	* End of sectors loop
	}
